In [2]:
import os
import collections
import pandas as pd
import numpy as np
import functools
import matplotlib.pyplot as plt
import cv2

from sklearn import preprocessing 


import xml.etree.ElementTree as ET

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data import SequentialSampler

import glob
import cv2
from PIL import Image
In [3]:
BASE_PATH = "../input/drone-dataset-uav/dataset_xml_format"
TXT_PATH = glob.glob("../input/drone-dataset-uav/drone_dataset_yolo/dataset_txt/*.txt")
IMG_PATH = glob.glob("../input/drone-dataset-uav/drone_dataset_yolo/dataset_txt/*.jpg")
In [4]:
train_image = []
train_image_path = []
train_image_id = []
train_annotation = []

Convert YOLO to LIST¶

In [5]:
def convert_format(box, iwidth = 256, iheight = 256):
    x = (iwidth*box[0]) - ((box[2]*iwidth)/2)
    y = (iheight*box[1]) - ((box[3]*iheight)/2)
    o_w = box[2]*iwidth
    o_h = box[3]*iheight
    return [x, y, o_w, o_h]

Loading the Images and Annotations¶

In [6]:
for i in IMG_PATH:     #slicing for insufficient memory
    Load_image = Image.open(i).convert('RGB')
    nmpy_img = Load_image.resize((258,258))
    train_image.append(np.asarray(nmpy_img))
    train_image_path.append(i)
    
    seperate_path = i.split('.')
    train_image_id.append(seperate_path[-2].split('/')[-1])
    
    seperate_path[1] = '.txt'
    with open('../'+seperate_path[2]+seperate_path[1]) as f:
        lines = f.readlines()
        tmp_lst = lines[0].split(' ')
        #label = int(tmp_lst[0])
        startX = float(tmp_lst[1])
        startY = float(tmp_lst[2])
        endX = float(tmp_lst[3])
        endY = float(tmp_lst[4])
        
        train_annotation.append(convert_format([startX, startY, endX, endY]))
/opt/conda/lib/python3.7/site-packages/PIL/Image.py:993: UserWarning: Palette images with Transparency expressed in bytes should be converted to RGBA images
  "Palette images with Transparency expressed in bytes should be "
In [7]:
plt.figure(figsize=(20, 20))

for i in range(28):
    ax = plt.subplot(7, 7, i + 1)
    plt.imshow(train_image[i].astype("uint8"))
    plt.axis("off")

Creating Dataframe¶

In [8]:
df = pd.DataFrame()

Required Column for Model¶

In [9]:
df['boxes'] = train_annotation
df['img_path'] = train_image_path
df['labels'] = 'drone'
df['img_id'] = train_image_id
In [10]:
df
Out[10]:
boxes img_path labels img_id
0 [1.5360000000000014, 57.343999999999994, 249.3... ../input/drone-dataset-uav/drone_dataset_yolo/... drone pic_243
1 [29.333376, 39.466623999999996, 226.666752, 20... ../input/drone-dataset-uav/drone_dataset_yolo/... drone pic_772
2 [6.656000000000006, 11.519999999999996, 246.27... ../input/drone-dataset-uav/drone_dataset_yolo/... drone pic_098
3 [6.912000000000006, 38.65599999999999, 243.2, ... ../input/drone-dataset-uav/drone_dataset_yolo/... drone pic_075
4 [12.671999999999997, 9.343999999999994, 228.60... ../input/drone-dataset-uav/drone_dataset_yolo/... drone 0285
... ... ... ... ...
1354 [18.176000000000002, 35.072, 233.984, 165.888] ../input/drone-dataset-uav/drone_dataset_yolo/... drone pic_096
1355 [11.803903999999989, 54.26624, 238.294016, 118... ../input/drone-dataset-uav/drone_dataset_yolo/... drone pic_213
1356 [1.706624000000005, 69.11987200000002, 254.293... ../input/drone-dataset-uav/drone_dataset_yolo/... drone pic_484
1357 [51.88262399999999, 70.369792, 173.397248, 76.... ../input/drone-dataset-uav/drone_dataset_yolo/... drone pic_414
1358 [13.250047999999992, 73.99987200000001, 232.0,... ../input/drone-dataset-uav/drone_dataset_yolo/... drone pic_564

1359 rows × 4 columns

Encoding the Label¶

In [11]:
# classes need to be in int form so we use LabelEncoder for this task
enc = preprocessing.LabelEncoder()
df['labels'] = enc.fit_transform(df['labels'])
df['labels'] = np.stack(df['labels'][i]+1 for i in range(len(df['labels']))) 
/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3472: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  if (await self.run_code(code, result,  async_=asy)):

Extracting Xmin,Xmax,Ymin,Ymax¶

In [12]:
df[['xmin','ymin','xmax','ymax']]=np.stack(df['boxes'][i] for i in range(len(df['boxes'])))

df.drop(columns=['boxes'], inplace=True)
df['xmin'] = df['xmin'].astype(np.float)
df['ymin'] = df['ymin'].astype(np.float)
df['xmax'] = df['xmax'].astype(np.float)
df['ymax'] = df['ymax'].astype(np.float)
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:4: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  after removing the cwd from sys.path.
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:5: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:6: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:7: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  import sys

Removing Mistaken Annotation¶

In [13]:
df_removed_damaged = df.drop(df[(df.ymin > df.ymax) | (df.xmin > df.xmax)].index)
In [14]:
len(df_removed_damaged['img_id'].unique())
Out[14]:
1042
In [15]:
train_ds = df_removed_damaged[0:750]
val_ds = df_removed_damaged[750:]
train_ds.shape, val_ds.shape
Out[15]:
((750, 7), (292, 7))

TrainDataset Generator¶

In [16]:
class TrainDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        
        self.image_ids = dataframe['img_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
    
    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        records = self.df[self.df['img_id'] == image_id]
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.resize(image,(256,256))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        rows, cols = image.shape[:2]
        
        boxes = records[['xmin', 'ymin', 'xmax', 'ymax']].values
        
       
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        
        label = records['labels'].values
        labels = torch.as_tensor(label, dtype=torch.int64)
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        # target['masks'] = None
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd
        
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1,0)
            
            return image, target
        
    def __len__(self) -> int:
        return self.image_ids.shape[0]

Annotation of Image¶

In [17]:
def get_transform_train():
    return A.Compose([
        A.Flip(0.5),
        A.ShiftScaleRotate(scale_limit=0.1, rotate_limit=45, p=0.25),
        A.LongestMaxSize(max_size=800, p=1.0),

        # FasterRCNN will normalize.
        A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
    ], bbox_params={'format':'pascal_voc', 'label_fields': ['labels']})

def get_transform_valid():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields':['labels']})

Generating Train and Validation data¶

In [18]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = TrainDataset(train_ds, '../input/drone-dataset-uav/drone_dataset_yolo/dataset_txt', get_transform_train())
valid_dataset = TrainDataset(val_ds, '../input/drone-dataset-uav/drone_dataset_yolo/dataset_txt', get_transform_valid())


train_data_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)
/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py:490: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
  cpuset_checked))
In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
In [20]:
device
Out[20]:
device(type='cuda')
In [21]:
images, targets= next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

No of Classes¶

In [22]:
classes= {1:'drone'}
In [23]:
images, targets= next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

plt.figure(figsize=(20,20))
for i, (image, target) in enumerate(zip(images, targets)):
    plt.subplot(6,4, i+1)
    boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
    sample = images[i].permute(1,2,0).cpu().numpy() * 255
    names = targets[i]['labels'].cpu().numpy().astype(np.int64)
    for i,box in enumerate(boxes):
        cv2.rectangle(sample,
                      (box[0], box[1]),
                      (box[2], box[3]),
                      (0, 0, 220), 2)
        cv2.putText(sample, classes[names[i]], (box[0],box[1]+15),cv2.FONT_HERSHEY_COMPLEX ,0.5,(0,220,0),1,cv2.LINE_AA)  

    plt.axis('off')
    plt.imshow(sample)

Main Model¶

In [24]:
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
  0%|          | 0.00/160M [00:00<?, ?B/s]

Preparing Faster-RCNN¶

In [25]:
num_classes = 2

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Model Parameters¶

In [26]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

Install Required Modules¶

In [27]:
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-j9dgah21
  Running command git clone --filter=blob:none --quiet https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-j9dgah21
  Resolved https://github.com/cocodataset/cocoapi.git to commit 8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9
  Preparing metadata (setup.py) ... done
Requirement already satisfied: setuptools>=18.0 in /opt/conda/lib/python3.7/site-packages (from pycocotools==2.0) (59.8.0)
Requirement already satisfied: cython>=0.27.3 in /opt/conda/lib/python3.7/site-packages (from pycocotools==2.0) (0.29.30)
Requirement already satisfied: matplotlib>=2.1.0 in /opt/conda/lib/python3.7/site-packages (from pycocotools==2.0) (3.5.2)
Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools==2.0) (0.11.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools==2.0) (1.4.3)
Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools==2.0) (4.33.3)
Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools==2.0) (9.1.1)
Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools==2.0) (21.3)
Requirement already satisfied: pyparsing>=2.2.1 in /opt/conda/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools==2.0) (3.0.9)
Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools==2.0) (1.21.6)
Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools==2.0) (2.8.2)
Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.1.0->pycocotools==2.0) (4.1.1)
Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.7/site-packages (from python-dateutil>=2.7->matplotlib>=2.1.0->pycocotools==2.0) (1.16.0)
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... done
  Created wheel for pycocotools: filename=pycocotools-2.0-cp37-cp37m-linux_x86_64.whl size=371257 sha256=2217a0c835770587f1f3a56eddde3a32f5a3ba8e2cb05e3376cbe0bf30859c9b
  Stored in directory: /tmp/pip-ephem-wheel-cache-37cpo0pd/wheels/e2/6b/1d/344ac773c7495ea0b85eb228bc66daec7400a143a92d36b7b1
Successfully built pycocotools
Installing collected packages: pycocotools
Successfully installed pycocotools-2.0
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
In [28]:
!git clone https://github.com/pytorch/vision.git
    
!cd vision;cp references/detection/utils.py ../;cp references/detection/transforms.py ../;cp references/detection/coco_eval.py ../;cp references/detection/engine.py ../;cp references/detection/coco_utils.py ../
Cloning into 'vision'...
remote: Enumerating objects: 271642, done.
remote: Counting objects: 100% (10843/10843), done.
remote: Compressing objects: 100% (527/527), done.
remote: Total 271642 (delta 10331), reused 10781 (delta 10295), pack-reused 260799
Receiving objects: 100% (271642/271642), 546.20 MiB | 35.75 MiB/s, done.
Resolving deltas: 100% (248082/248082), done.
In [29]:
from engine import train_one_epoch, evaluate
import utils
In [30]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

Model Training¶

In [31]:
# let's train it for 20 epochs
num_epochs = 20

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, valid_data_loader, device=device)
Epoch: [0]  [ 0/47]  eta: 0:09:50  lr: 0.000114  loss: 0.5547 (0.5547)  loss_classifier: 0.3780 (0.3780)  loss_box_reg: 0.1097 (0.1097)  loss_objectness: 0.0546 (0.0546)  loss_rpn_box_reg: 0.0124 (0.0124)  time: 12.5632  data: 3.1794  max mem: 11511
Epoch: [0]  [10/47]  eta: 0:01:47  lr: 0.001199  loss: 0.5922 (0.7587)  loss_classifier: 0.2770 (0.2817)  loss_box_reg: 0.1041 (0.1115)  loss_objectness: 0.1452 (0.3044)  loss_rpn_box_reg: 0.0356 (0.0611)  time: 2.9028  data: 0.3530  max mem: 11611
Epoch: [0]  [20/47]  eta: 0:01:06  lr: 0.002285  loss: 0.4728 (0.6124)  loss_classifier: 0.1571 (0.2082)  loss_box_reg: 0.1041 (0.1102)  loss_objectness: 0.1067 (0.2413)  loss_rpn_box_reg: 0.0245 (0.0526)  time: 1.9641  data: 0.0799  max mem: 11611
Epoch: [0]  [30/47]  eta: 0:00:39  lr: 0.003371  loss: 0.3332 (0.5656)  loss_classifier: 0.1098 (0.1741)  loss_box_reg: 0.0941 (0.1061)  loss_objectness: 0.0873 (0.2214)  loss_rpn_box_reg: 0.0224 (0.0639)  time: 1.9859  data: 0.0901  max mem: 11611
Epoch: [0]  [40/47]  eta: 0:00:15  lr: 0.004457  loss: 0.2637 (0.5074)  loss_classifier: 0.0948 (0.1518)  loss_box_reg: 0.0921 (0.1004)  loss_objectness: 0.0411 (0.1891)  loss_rpn_box_reg: 0.0408 (0.0661)  time: 1.9727  data: 0.0864  max mem: 11611
Epoch: [0]  [46/47]  eta: 0:00:02  lr: 0.005000  loss: 0.2479 (0.4689)  loss_classifier: 0.0772 (0.1418)  loss_box_reg: 0.0850 (0.0988)  loss_objectness: 0.0366 (0.1681)  loss_rpn_box_reg: 0.0283 (0.0601)  time: 1.9410  data: 0.0865  max mem: 11611
Epoch: [0] Total time: 0:01:42 (2.1833 s / it)
creating index...
index created!
Test:  [ 0/73]  eta: 0:01:07  model_time: 0.2800 (0.2800)  evaluator_time: 0.0989 (0.0989)  time: 0.9240  data: 0.5428  max mem: 11611
Test:  [72/73]  eta: 0:00:00  model_time: 0.1786 (0.1825)  evaluator_time: 0.0251 (0.0332)  time: 0.2394  data: 0.0169  max mem: 11611
Test: Total time: 0:00:17 (0.2451 s / it)
Averaged stats: model_time: 0.1786 (0.1825)  evaluator_time: 0.0251 (0.0332)
Accumulating evaluation results...
DONE (t=0.28s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.011
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.010
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.003
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.061
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.257
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.119
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.327
Epoch: [1]  [ 0/47]  eta: 0:04:48  lr: 0.005000  loss: 0.2349 (0.2349)  loss_classifier: 0.0767 (0.0767)  loss_box_reg: 0.0960 (0.0960)  loss_objectness: 0.0301 (0.0301)  loss_rpn_box_reg: 0.0321 (0.0321)  time: 6.1348  data: 3.8054  max mem: 11611
Epoch: [1]  [10/47]  eta: 0:01:28  lr: 0.005000  loss: 0.2043 (0.2897)  loss_classifier: 0.0638 (0.0642)  loss_box_reg: 0.0729 (0.0778)  loss_objectness: 0.0301 (0.0796)  loss_rpn_box_reg: 0.0273 (0.0681)  time: 2.3896  data: 0.4159  max mem: 11611
Epoch: [1]  [20/47]  eta: 0:00:59  lr: 0.005000  loss: 0.2364 (0.3125)  loss_classifier: 0.0607 (0.0614)  loss_box_reg: 0.0725 (0.0735)  loss_objectness: 0.0515 (0.0976)  loss_rpn_box_reg: 0.0278 (0.0800)  time: 2.0039  data: 0.0837  max mem: 11612
Epoch: [1]  [30/47]  eta: 0:00:36  lr: 0.005000  loss: 0.1833 (0.2720)  loss_classifier: 0.0522 (0.0582)  loss_box_reg: 0.0659 (0.0699)  loss_objectness: 0.0342 (0.0811)  loss_rpn_box_reg: 0.0278 (0.0628)  time: 1.9804  data: 0.0871  max mem: 11612
Epoch: [1]  [40/47]  eta: 0:00:14  lr: 0.005000  loss: 0.1833 (0.2600)  loss_classifier: 0.0515 (0.0570)  loss_box_reg: 0.0633 (0.0688)  loss_objectness: 0.0342 (0.0768)  loss_rpn_box_reg: 0.0225 (0.0574)  time: 1.9765  data: 0.0851  max mem: 11612
Epoch: [1]  [46/47]  eta: 0:00:02  lr: 0.005000  loss: 0.1856 (0.2514)  loss_classifier: 0.0529 (0.0564)  loss_box_reg: 0.0655 (0.0688)  loss_objectness: 0.0326 (0.0718)  loss_rpn_box_reg: 0.0232 (0.0543)  time: 1.9431  data: 0.0827  max mem: 11612
Epoch: [1] Total time: 0:01:36 (2.0633 s / it)
creating index...
index created!
Test:  [ 0/73]  eta: 0:01:01  model_time: 0.2666 (0.2666)  evaluator_time: 0.0603 (0.0603)  time: 0.8379  data: 0.5086  max mem: 11612
Test:  [72/73]  eta: 0:00:00  model_time: 0.1789 (0.1820)  evaluator_time: 0.0236 (0.0260)  time: 0.2213  data: 0.0171  max mem: 11612
Test: Total time: 0:00:17 (0.2365 s / it)
Averaged stats: model_time: 0.1789 (0.1820)  evaluator_time: 0.0236 (0.0260)
Accumulating evaluation results...
DONE (t=0.26s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.004
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.017
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.013
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.011
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.094
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.279
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.142
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.349
Epoch: [2]  [ 0/47]  eta: 0:04:48  lr: 0.005000  loss: 0.1632 (0.1632)  loss_classifier: 0.0522 (0.0522)  loss_box_reg: 0.0535 (0.0535)  loss_objectness: 0.0287 (0.0287)  loss_rpn_box_reg: 0.0288 (0.0288)  time: 6.1413  data: 3.9004  max mem: 11612
Epoch: [2]  [10/47]  eta: 0:01:28  lr: 0.005000  loss: 0.1778 (0.2289)  loss_classifier: 0.0502 (0.0507)  loss_box_reg: 0.0609 (0.0620)  loss_objectness: 0.0361 (0.0634)  loss_rpn_box_reg: 0.0286 (0.0528)  time: 2.3975  data: 0.4472  max mem: 11612
Epoch: [2]  [20/47]  eta: 0:00:59  lr: 0.005000  loss: 0.1667 (0.2017)  loss_classifier: 0.0493 (0.0496)  loss_box_reg: 0.0588 (0.0594)  loss_objectness: 0.0366 (0.0508)  loss_rpn_box_reg: 0.0241 (0.0419)  time: 2.0059  data: 0.0953  max mem: 11612
Epoch: [2]  [30/47]  eta: 0:00:36  lr: 0.005000  loss: 0.1584 (0.1922)  loss_classifier: 0.0479 (0.0487)  loss_box_reg: 0.0562 (0.0591)  loss_objectness: 0.0348 (0.0480)  loss_rpn_box_reg: 0.0198 (0.0364)  time: 1.9912  data: 0.0891  max mem: 11612
Epoch: [2]  [40/47]  eta: 0:00:14  lr: 0.005000  loss: 0.1696 (0.2020)  loss_classifier: 0.0467 (0.0493)  loss_box_reg: 0.0595 (0.0594)  loss_objectness: 0.0322 (0.0509)  loss_rpn_box_reg: 0.0214 (0.0424)  time: 1.9918  data: 0.0893  max mem: 11612
Epoch: [2]  [46/47]  eta: 0:00:02  lr: 0.005000  loss: 0.1770 (0.2080)  loss_classifier: 0.0493 (0.0492)  loss_box_reg: 0.0593 (0.0593)  loss_objectness: 0.0373 (0.0552)  loss_rpn_box_reg: 0.0214 (0.0443)  time: 1.9500  data: 0.0886  max mem: 11612
Epoch: [2] Total time: 0:01:37 (2.0738 s / it)
creating index...
index created!
Test:  [ 0/73]  eta: 0:01:00  model_time: 0.2464 (0.2464)  evaluator_time: 0.0418 (0.0418)  time: 0.8314  data: 0.5411  max mem: 11612
Test:  [72/73]  eta: 0:00:00  model_time: 0.1793 (0.1842)  evaluator_time: 0.0233 (0.0257)  time: 0.2235  data: 0.0175  max mem: 11612
Test: Total time: 0:00:17 (0.2404 s / it)
Averaged stats: model_time: 0.1793 (0.1842)  evaluator_time: 0.0233 (0.0257)
Accumulating evaluation results...
DONE (t=0.24s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.005
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.022
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.015
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.012
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.112
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.276
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.116
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.356
Epoch: [3]  [ 0/47]  eta: 0:06:20  lr: 0.005000  loss: 0.5685 (0.5685)  loss_classifier: 0.0571 (0.0571)  loss_box_reg: 0.0669 (0.0669)  loss_objectness: 0.2314 (0.2314)  loss_rpn_box_reg: 0.2131 (0.2131)  time: 8.0873  data: 6.1611  max mem: 11612
Epoch: [3]  [10/47]  eta: 0:01:33  lr: 0.005000  loss: 0.1719 (0.2245)  loss_classifier: 0.0530 (0.0526)  loss_box_reg: 0.0634 (0.0622)  loss_objectness: 0.0444 (0.0611)  loss_rpn_box_reg: 0.0206 (0.0486)  time: 2.5203  data: 0.6065  max mem: 11612
Epoch: [3]  [20/47]  eta: 0:01:01  lr: 0.005000  loss: 0.1694 (0.1988)  loss_classifier: 0.0549 (0.0546)  loss_box_reg: 0.0646 (0.0655)  loss_objectness: 0.0280 (0.0432)  loss_rpn_box_reg: 0.0196 (0.0354)  time: 1.9679  data: 0.0673  max mem: 11612
Epoch: [3]  [30/47]  eta: 0:00:36  lr: 0.005000  loss: 0.1726 (0.2042)  loss_classifier: 0.0541 (0.0535)  loss_box_reg: 0.0640 (0.0647)  loss_objectness: 0.0280 (0.0485)  loss_rpn_box_reg: 0.0250 (0.0375)  time: 1.9831  data: 0.0851  max mem: 11612
Epoch: [3]  [40/47]  eta: 0:00:14  lr: 0.005000  loss: 0.1796 (0.1962)  loss_classifier: 0.0513 (0.0530)  loss_box_reg: 0.0603 (0.0640)  loss_objectness: 0.0330 (0.0454)  loss_rpn_box_reg: 0.0208 (0.0339)  time: 1.9859  data: 0.0877  max mem: 11612
Epoch: [3]  [46/47]  eta: 0:00:02  lr: 0.005000  loss: 0.1796 (0.2029)  loss_classifier: 0.0513 (0.0524)  loss_box_reg: 0.0594 (0.0629)  loss_objectness: 0.0366 (0.0501)  loss_rpn_box_reg: 0.0208 (0.0375)  time: 1.9454  data: 0.0877  max mem: 11612
Epoch: [3] Total time: 0:01:38 (2.0968 s / it)
creating index...
index created!
Test:  [ 0/73]  eta: 0:01:00  model_time: 0.2609 (0.2609)  evaluator_time: 0.0380 (0.0380)  time: 0.8310  data: 0.5296  max mem: 11612
Test:  [72/73]  eta: 0:00:00  model_time: 0.1794 (0.1860)  evaluator_time: 0.0199 (0.0229)  time: 0.2183  data: 0.0166  max mem: 11612
Test: Total time: 0:00:17 (0.2393 s / it)
Averaged stats: model_time: 0.1794 (0.1860)  evaluator_time: 0.0199 (0.0229)
Accumulating evaluation results...
DONE (t=0.23s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.006
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.026
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.016
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.017
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.128
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.265
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.120
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.337
Epoch: [4]  [ 0/47]  eta: 0:04:19  lr: 0.005000  loss: 0.2020 (0.2020)  loss_classifier: 0.0446 (0.0446)  loss_box_reg: 0.0496 (0.0496)  loss_objectness: 0.0607 (0.0607)  loss_rpn_box_reg: 0.0471 (0.0471)  time: 5.5184  data: 3.0136  max mem: 11612
Epoch: [4]  [10/47]  eta: 0:01:25  lr: 0.005000  loss: 0.1809 (0.1932)  loss_classifier: 0.0485 (0.0500)  loss_box_reg: 0.0600 (0.0602)  loss_objectness: 0.0335 (0.0463)  loss_rpn_box_reg: 0.0255 (0.0367)  time: 2.3211  data: 0.3583  max mem: 11612
Epoch: [4]  [20/47]  eta: 0:00:58  lr: 0.005000  loss: 0.1632 (0.1980)  loss_classifier: 0.0485 (0.0493)  loss_box_reg: 0.0572 (0.0576)  loss_objectness: 0.0335 (0.0522)  loss_rpn_box_reg: 0.0209 (0.0389)  time: 1.9886  data: 0.0874  max mem: 11612
Epoch: [4]  [30/47]  eta: 0:00:35  lr: 0.005000  loss: 0.1689 (0.1913)  loss_classifier: 0.0496 (0.0502)  loss_box_reg: 0.0571 (0.0594)  loss_objectness: 0.0320 (0.0450)  loss_rpn_box_reg: 0.0223 (0.0367)  time: 1.9854  data: 0.0871  max mem: 11612
Epoch: [4]  [40/47]  eta: 0:00:14  lr: 0.005000  loss: 0.1700 (0.1961)  loss_classifier: 0.0518 (0.0505)  loss_box_reg: 0.0599 (0.0597)  loss_objectness: 0.0310 (0.0485)  loss_rpn_box_reg: 0.0200 (0.0374)  time: 1.9768  data: 0.0887  max mem: 11612
Epoch: [4]  [46/47]  eta: 0:00:02  lr: 0.005000  loss: 0.1685 (0.2032)  loss_classifier: 0.0517 (0.0501)  loss_box_reg: 0.0594 (0.0591)  loss_objectness: 0.0349 (0.0519)  loss_rpn_box_reg: 0.0260 (0.0420)  time: 1.9493  data: 0.0941  max mem: 11612
Epoch: [4] Total time: 0:01:36 (2.0517 s / it)
creating index...
index created!
Test:  [ 0/73]  eta: 0:00:58  model_time: 0.3143 (0.3143)  evaluator_time: 0.0585 (0.0585)  time: 0.8078  data: 0.4328  max mem: 11612
Test:  [72/73]  eta: 0:00:00  model_time: 0.1807 (0.1862)  evaluator_time: 0.0194 (0.0220)  time: 0.2263  data: 0.0182  max mem: 11612
Test: Total time: 0:00:17 (0.2393 s / it)
Averaged stats: model_time: 0.1807 (0.1862)  evaluator_time: 0.0194 (0.0220)
Accumulating evaluation results...
DONE (t=0.20s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.006
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.027
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.017
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.019
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.128
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.264
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.104
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.343
Epoch: [5]  [ 0/47]  eta: 0:04:55  lr: 0.000500  loss: 0.1875 (0.1875)  loss_classifier: 0.0525 (0.0525)  loss_box_reg: 0.0513 (0.0513)  loss_objectness: 0.0582 (0.0582)  loss_rpn_box_reg: 0.0254 (0.0254)  time: 6.2936  data: 3.9750  max mem: 11612
Epoch: [5]  [10/47]  eta: 0:01:29  lr: 0.000500  loss: 0.1578 (0.1847)  loss_classifier: 0.0505 (0.0500)  loss_box_reg: 0.0591 (0.0580)  loss_objectness: 0.0293 (0.0413)  loss_rpn_box_reg: 0.0181 (0.0354)  time: 2.4148  data: 0.4584  max mem: 11612
Epoch: [5]  [20/47]  eta: 0:00:59  lr: 0.000500  loss: 0.1650 (0.2061)  loss_classifier: 0.0486 (0.0491)  loss_box_reg: 0.0571 (0.0558)  loss_objectness: 0.0328 (0.0510)  loss_rpn_box_reg: 0.0286 (0.0502)  time: 2.0147  data: 0.1016  max mem: 11612
Epoch: [5]  [30/47]  eta: 0:00:36  lr: 0.000500  loss: 0.1679 (0.2005)  loss_classifier: 0.0482 (0.0492)  loss_box_reg: 0.0571 (0.0564)  loss_objectness: 0.0357 (0.0504)  loss_rpn_box_reg: 0.0297 (0.0444)  time: 2.0062  data: 0.0939  max mem: 11612
Epoch: [5]  [40/47]  eta: 0:00:14  lr: 0.000500  loss: 0.1733 (0.2062)  loss_classifier: 0.0482 (0.0501)  loss_box_reg: 0.0584 (0.0576)  loss_objectness: 0.0357 (0.0520)  loss_rpn_box_reg: 0.0308 (0.0465)  time: 1.9903  data: 0.0883  max mem: 11612
Epoch: [5]  [46/47]  eta: 0:00:02  lr: 0.000500  loss: 0.1719 (0.1997)  loss_classifier: 0.0474 (0.0491)  loss_box_reg: 0.0577 (0.0568)  loss_objectness: 0.0296 (0.0502)  loss_rpn_box_reg: 0.0324 (0.0436)  time: 1.9500  data: 0.0859  max mem: 11612
Epoch: [5] Total time: 0:01:37 (2.0794 s / it)
creating index...
index created!
Test:  [ 0/73]  eta: 0:01:03  model_time: 0.2935 (0.2935)  evaluator_time: 0.0215 (0.0215)  time: 0.8765  data: 0.5593  max mem: 11612
Test:  [72/73]  eta: 0:00:00  model_time: 0.1794 (0.1849)  evaluator_time: 0.0184 (0.0196)  time: 0.2228  data: 0.0184  max mem: 11612
Test: Total time: 0:00:17 (0.2342 s / it)
Averaged stats: model_time: 0.1794 (0.1849)  evaluator_time: 0.0184 (0.0196)
Accumulating evaluation results...
DONE (t=0.21s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.007
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.028
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.018
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.019
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.132
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.266
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.115
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.341
Epoch: [6]  [ 0/47]  eta: 0:04:55  lr: 0.000500  loss: 0.4149 (0.4149)  loss_classifier: 0.0467 (0.0467)  loss_box_reg: 0.0564 (0.0564)  loss_objectness: 0.2125 (0.2125)  loss_rpn_box_reg: 0.0993 (0.0993)  time: 6.2943  data: 3.6389  max mem: 11612
Epoch: [6]  [10/47]  eta: 0:01:28  lr: 0.000500  loss: 0.1741 (0.1985)  loss_classifier: 0.0455 (0.0466)  loss_box_reg: 0.0535 (0.0538)  loss_objectness: 0.0424 (0.0600)  loss_rpn_box_reg: 0.0298 (0.0380)  time: 2.3808  data: 0.4001  max mem: 11612
Epoch: [6]  [20/47]  eta: 0:00:59  lr: 0.000500  loss: 0.1664 (0.1831)  loss_classifier: 0.0474 (0.0491)  loss_box_reg: 0.0576 (0.0579)  loss_objectness: 0.0277 (0.0456)  loss_rpn_box_reg: 0.0246 (0.0305)  time: 1.9896  data: 0.0812  max mem: 11612
Epoch: [6]  [30/47]  eta: 0:00:36  lr: 0.000500  loss: 0.1628 (0.1845)  loss_classifier: 0.0512 (0.0501)  loss_box_reg: 0.0599 (0.0589)  loss_objectness: 0.0277 (0.0441)  loss_rpn_box_reg: 0.0229 (0.0313)  time: 1.9876  data: 0.0860  max mem: 11612
Epoch: [6]  [40/47]  eta: 0:00:14  lr: 0.000500  loss: 0.1752 (0.1824)  loss_classifier: 0.0507 (0.0503)  loss_box_reg: 0.0582 (0.0590)  loss_objectness: 0.0305 (0.0424)  loss_rpn_box_reg: 0.0232 (0.0307)  time: 1.9853  data: 0.0876  max mem: 11612
Epoch: [6]  [46/47]  eta: 0:00:02  lr: 0.000500  loss: 0.1778 (0.1939)  loss_classifier: 0.0489 (0.0498)  loss_box_reg: 0.0561 (0.0585)  loss_objectness: 0.0378 (0.0477)  loss_rpn_box_reg: 0.0387 (0.0380)  time: 1.9463  data: 0.0854  max mem: 11612
Epoch: [6] Total time: 0:01:37 (2.0658 s / it)
creating index...
index created!
Test:  [ 0/73]  eta: 0:00:59  model_time: 0.2517 (0.2517)  evaluator_time: 0.0395 (0.0395)  time: 0.8097  data: 0.5163  max mem: 11612
Test:  [72/73]  eta: 0:00:00  model_time: 0.1798 (0.1837)  evaluator_time: 0.0185 (0.0206)  time: 0.2209  data: 0.0167  max mem: 11612
Test: Total time: 0:00:17 (0.2333 s / it)
Averaged stats: model_time: 0.1798 (0.1837)  evaluator_time: 0.0185 (0.0206)
Accumulating evaluation results...
DONE (t=0.21s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.007
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.028
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.019
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.020
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.133
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.271
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.111
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.350
Epoch: [7]  [ 0/47]  eta: 0:04:28  lr: 0.000500  loss: 0.2052 (0.2052)  loss_classifier: 0.0510 (0.0510)  loss_box_reg: 0.0537 (0.0537)  loss_objectness: 0.0408 (0.0408)  loss_rpn_box_reg: 0.0597 (0.0597)  time: 5.7150  data: 3.3352  max mem: 11612
Epoch: [7]  [10/47]  eta: 0:01:27  lr: 0.000500  loss: 0.1677 (0.1893)  loss_classifier: 0.0488 (0.0489)  loss_box_reg: 0.0569 (0.0576)  loss_objectness: 0.0359 (0.0448)  loss_rpn_box_reg: 0.0351 (0.0381)  time: 2.3769  data: 0.4075  max mem: 11612
Epoch: [7]  [20/47]  eta: 0:00:59  lr: 0.000500  loss: 0.1558 (0.1754)  loss_classifier: 0.0488 (0.0495)  loss_box_reg: 0.0569 (0.0584)  loss_objectness: 0.0252 (0.0382)  loss_rpn_box_reg: 0.0216 (0.0292)  time: 2.0233  data: 0.1049  max mem: 11612
Epoch: [7]  [30/47]  eta: 0:00:36  lr: 0.000500  loss: 0.1549 (0.1872)  loss_classifier: 0.0509 (0.0498)  loss_box_reg: 0.0629 (0.0590)  loss_objectness: 0.0252 (0.0436)  loss_rpn_box_reg: 0.0171 (0.0348)  time: 1.9896  data: 0.0917  max mem: 11612
Epoch: [7]  [40/47]  eta: 0:00:14  lr: 0.000500  loss: 0.1631 (0.1859)  loss_classifier: 0.0503 (0.0495)  loss_box_reg: 0.0622 (0.0591)  loss_objectness: 0.0324 (0.0422)  loss_rpn_box_reg: 0.0242 (0.0351)  time: 1.9830  data: 0.0881  max mem: 11612
Epoch: [7]  [46/47]  eta: 0:00:02  lr: 0.000500  loss: 0.1687 (0.1843)  loss_classifier: 0.0460 (0.0490)  loss_box_reg: 0.0556 (0.0586)  loss_objectness: 0.0324 (0.0421)  loss_rpn_box_reg: 0.0313 (0.0346)  time: 1.9524  data: 0.0866  max mem: 11612
Epoch: [7] Total time: 0:01:37 (2.0675 s / it)
creating index...
index created!
Test:  [ 0/73]  eta: 0:00:59  model_time: 0.2362 (0.2362)  evaluator_time: 0.0201 (0.0201)  time: 0.8194  data: 0.5610  max mem: 11612
Test:  [72/73]  eta: 0:00:00  model_time: 0.1794 (0.1853)  evaluator_time: 0.0189 (0.0214)  time: 0.2195  data: 0.0184  max mem: 11612
Test: Total time: 0:00:17 (0.2380 s / it)
Averaged stats: model_time: 0.1794 (0.1853)  evaluator_time: 0.0189 (0.0214)
Accumulating evaluation results...
DONE (t=0.32s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.007
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.029
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.019
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.021
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.133
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.268
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.099
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.352
Epoch: [8]  [ 0/47]  eta: 0:04:15  lr: 0.000500  loss: 0.2326 (0.2326)  loss_classifier: 0.0594 (0.0594)  loss_box_reg: 0.0675 (0.0675)  loss_objectness: 0.0710 (0.0710)  loss_rpn_box_reg: 0.0347 (0.0347)  time: 5.4430  data: 3.1149  max mem: 11612
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
/tmp/ipykernel_17/88591243.py in <module>
      4 for epoch in range(num_epochs):
      5     # train for one epoch, printing every 10 iterations
----> 6     train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
      7     # update the learning rate
      8     lr_scheduler.step()

/kaggle/working/engine.py in train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler)
     29         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
     30         with torch.cuda.amp.autocast(enabled=scaler is not None):
---> 31             loss_dict = model(images, targets)
     32             losses = sum(loss for loss in loss_dict.values())
     33 

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

/opt/conda/lib/python3.7/site-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
     96         if isinstance(features, torch.Tensor):
     97             features = OrderedDict([("0", features)])
---> 98         proposals, proposal_losses = self.rpn(images, features, targets)
     99         detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
    100         detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)  # type: ignore[operator]

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

/opt/conda/lib/python3.7/site-packages/torchvision/models/detection/rpn.py in forward(self, images, features, targets)
    340         features = list(features.values())
    341         objectness, pred_bbox_deltas = self.head(features)
--> 342         anchors = self.anchor_generator(images, features)
    343 
    344         num_images = len(anchors)

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

/opt/conda/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py in forward(self, image_list, feature_maps)
    125                 torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device),
    126             ]
--> 127             for g in grid_sizes
    128         ]
    129         self.set_cell_anchors(dtype, device)

/opt/conda/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py in <listcomp>(.0)
    125                 torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device),
    126             ]
--> 127             for g in grid_sizes
    128         ]
    129         self.set_cell_anchors(dtype, device)

KeyboardInterrupt: 
In [32]:
torch.save(model.state_dict(), 'faster_rcnn_state.pth')

Loading the Saved Weight¶

In [33]:
# load  a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)

WEIGHTS_FILE = "./faster_rcnn_state.pth"

num_classes = 2

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the traines weights
model.load_state_dict(torch.load(WEIGHTS_FILE))

model = model.to(device)
In [36]:
def obj_detector(img):
    img = cv2.imread(img, cv2.IMREAD_COLOR)
    #img - cv2.resize(img,(256,256))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)

    img /= 255.0
    img = torch.from_numpy(img)
    img = img.unsqueeze(0)
    img = img.permute(0,3,1,2)
    
    model.eval()

    detection_threshold = 0.20
    
    img = list(im.to(device) for im in img)
    output = model(img)

    for i , im in enumerate(img):
        boxes = output[i]['boxes'].data.cpu().numpy()
        scores = output[i]['scores'].data.cpu().numpy()
        labels = output[i]['labels'].data.cpu().numpy()

        labels = labels[scores >= detection_threshold]
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]

        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
    
    sample = img[0].permute(1,2,0).cpu().numpy()
    sample = np.array(sample)
    boxes = output[0]['boxes'].data.cpu().numpy()
    name = output[0]['labels'].data.cpu().numpy()
    scores = output[0]['scores'].data.cpu().numpy()
    boxes = boxes[scores >= detection_threshold].astype(np.int32)
    names = name.tolist()
    
    #print(names, boxes, sample)
    return names, boxes, sample
In [37]:
pred_files = glob.glob("../input/drone-dataset-uav/drone_dataset_yolo/dataset_txt/*.jpg")

plt.figure(figsize=(20,80))
for i, images in enumerate(pred_files):
    if i > 19:break
    plt.subplot(10,2,i+1)
    names,boxes,sample = obj_detector(images)
    for i,box in enumerate(boxes):
        cv2.rectangle(sample,
                      (box[0], box[1]),
                      (box[2], box[3]),
                      (0, 220, 0), 2)
        cv2.putText(sample, classes[names[i]], (box[0],box[1]-5),cv2.FONT_HERSHEY_COMPLEX ,0.7,(220,0,0),2,cv2.LINE_AA)  

    plt.axis('off')
    plt.imshow(sample)
#     plt.savefig('save_image.png', bbox_inches='tight')  # if you want to save result
In [3]:
import cv2
import os

# Path to your annotated dataset
dataset_path = r"C:\Users\RAKESH SINGH\Downloads\archive"

# Iterate through each image and display it
for emergency_type in ["fire", "flood", "traffic_accident"]:
    images_path = os.path.join(dataset_path, emergency_type)
    for image_filename in os.listdir(images_path):
        image_path = os.path.join(images_path, image_filename)
        image = cv2.imread(image_path)
        cv2.imshow("Emergency Image", image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[3], line 10
      8 for emergency_type in ["fire", "flood", "traffic_accident"]:
      9     images_path = os.path.join(dataset_path, emergency_type)
---> 10     for image_filename in os.listdir(images_path):
     11         image_path = os.path.join(images_path, image_filename)
     12         image = cv2.imread(image_path)

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\Users\\RAKESH SINGH\\Downloads\\archive\\fire'
In [ ]:
 
In [ ]:
 
In [ ]: